

library(haven)
library(readr)
library(data.table)
library(scales)
library(dplyr)
library(lfe)
library(stargazer)
cohort <- fread("H:/Zheng_10223/Joint/cohort2025_connolly.csv")

############################
# cut to 5 percentiles
cohort$MainParent_Income_HH_MainParentAge45_49_ventparent=cut(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent, breaks=seq(0,100,5), labels=c(5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100))


#######################################################
model_refugees=lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent,data=cohort[cohort$ImmigrationCategory=="Refugee",])
stargazer(model_refugees)


model_nonrefugees=lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent,data=cohort[cohort$ImmigrationCategory!="Refugee",])
stargazer(model_nonrefugees)


# and then look at the gap in mobility

model_bothgroups=lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent*factor(Status_Refugee),data=cohort)
stargazer(model_refugees, model_nonrefugees, model_bothgroups,add.lines=list(c("Sample","Refugees","Non-Refugees","Both")), out="H:/Zheng_10223/ToVet/Output/connolly_table.tex")


model_bothgroups=lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent*factor(Status_Refugee),data=cohort)
stargazer(model_refugees, model_nonrefugees, model_bothgroups,add.lines=list(c("Sample","Refugees","Non-Refugees","Both")), out="H:/Zheng_10223/ToVet/Output/connolly_table.txt")


#### FIGURE 2: BINSCATTER ####
par(mar=c(4.5,4.7,2,2))
plot(cohort[!ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=19, col="black", cex=1.2, xlab="Parent Rank", ylab="Child Rank", ylim=c(40,80), cex.lab=1.5, cex.axis=1.5)
points(cohort[ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=18, col="forestgreen", cex=1.5)
legend(x=60, y=48, pch=c(19,18), col=c("black", "forestgreen"), title="Immigrant Category                              ", cex=1.3, bty="n",
       c(paste("Non-Refugee; IGM=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[!ImmigrationCategory=="Refugee"])$coef[2],3), sep=""),
         paste("Refugee & Humanitarian; IGM=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[ImmigrationCategory=="Refugee"])$coef[2],3), sep="")))
abline(lm(cohort[!ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="black", lwd=2)
abline(lm(cohort[ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="forestgreen", lwd=2)

# with Connolly 
plot(cohort[!ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=19, col="black", cex=1.2, xlab="Parent Rank", ylab="Child Rank", ylim=c(35,80), cex.lab=1.5, cex.axis=1.5)
points(cohort[ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=18, col="forestgreen", cex=1.5)
legend(x=60, y=45, pch=c(19,18,NA), lty=c(1,1,2), col=c("black", "forestgreen", "grey60"), title="Immigrant Category                            ", cex=1.3, bty="n",
       lwd=c(2,2,2),c(paste("Non-Refugee; IGM=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[!ImmigrationCategory=="Refugee"])$coef[2],3), sep=""),
                      paste("Refugee & Humanitarian; IGM=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[ImmigrationCategory=="Refugee"])$coef[2],3), sep=""),
                      "Canadian-Born; IGM=0.286"))
abline(lm(cohort[!ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="black", lwd=2)
abline(lm(cohort[ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="forestgreen", lwd=2)
abline(a=36.536, b=0.286, col="grey60", lwd=2, lty=2)

# counts : 
dfrefugees=cohort %>% filter(ImmigrationCategory=="Refugee") %>% group_by(MainParent_Income_HH_MainParentAge45_49_pctparent) %>% summarize(count=n())
write.csv(dfrefugees,"H:/Zheng_10223/ToVet/Supporting/Figure1_connolly_refugees.csv")



dfnonrefugees=cohort %>% filter(ImmigrationCategory!="Refugee") %>% group_by(MainParent_Income_HH_MainParentAge45_49_pctparent) %>% summarize(count=n())
write.csv(dfnonrefugees,"H:/Zheng_10223/ToVet/Supporting/Figure1_connolly_nonrefugees.csv")

#
